library(Seurat)
## Registered S3 method overwritten by 'spatstat.core':
## method from
## formula.glmmPQL MASS
## Attaching SeuratObject
## Attaching sp
library(ggplot2)
library(Rtsne)
library(rgl)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl.init' failed, running with 'rgl.useNULL = TRUE'.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
library(RColorBrewer)
library(kohonen)
Completed table
# load("Neftel 500 6000.RData")
# #Reading output from Differential Expression
# neftel.markers <- readRDS("NeftelDElist.RDS")
# #Reading the outputfrom seurat cell cycle phase assignment
# neftel.cellcycle <- readRDS("NeftelCellScoringUPDATED.RDS")
exp.matFINAL <- read.table(file = "leblancmatrix2002000(scaled).txt", header = TRUE, as.is = TRUE, row.names = 1)
#View(neftel.markers)
# # Cut down expression table to just DE genes
# exp.matDE <- exp.matFINAL[rownames(exp.matFINAL) %in% leblanc.markers$gene,]
# #Obtain filtered cells and phases
# cellcycle.assignment<-data.frame(sort(neftel.cellcycle$Phase))
# #Cut down expression table by relevant cells only
# exp.matDEfinal <- exp.matDE[colnames(exp.matDE) %in% rownames(cellcycle.assignment)]
# #Order cells by phase
# colnames(exp.matDEfinal) <- intersect(rownames(cellcycle.assignment),colnames(exp.matDEfinal))
###############################################
#Average expression table and relevant plots
###############################################
load("Leblanc 200 2000.RData")
# Find the average expression of the genes in each phase
leblancavg <- AverageExpression(leblanc,slot = "scale.data")
# Filter the average expression table down to just the DE genes
leblancavg <- as.data.frame(leblancavg)
length(intersect(rownames(leblancavg),leblanc.markers$gene)) #184
## [1] 184
leblancavg <- leblancavg[rownames(leblancavg) %in% leblanc.markers$gene,]
###PCA CLUSTERING
leblancpca <- prcomp(leblancavg)
pca.scores <- data.frame(leblancpca$x)
leblanc.val <- cbind(leblancavg,pca.scores)
head(leblanc.val)
## RNA.G2M RNA.G1 RNA.S PC1 PC2 PC3
## STMN1 1.0712511 -0.4369753 0.43561460 -0.07955348 0.2566957 1.528159e-07
## HMGN2 1.2685131 -0.4806220 0.41208763 -0.28128889 0.2826509 1.917924e-07
## CLSPN 0.4336551 -0.4484289 0.94143266 0.64749697 0.6226320 -3.819931e-07
## CDCA8 1.4037896 -0.3483567 -0.06105879 -0.49938170 -0.1778889 6.532607e-07
## CDC20 1.6650299 -0.3774175 -0.17320052 -0.78067800 -0.2282792 7.819159e-07
## KIF2C 1.4195372 -0.3682884 -0.01659434 -0.50739970 -0.1273117 6.122882e-07
### SOM CLUSTERING
leblanc.val$gene <- rownames(leblanc.val)
leblanc.val <- leblanc.val %>% select(gene, everything())
som.data <- as.matrix(leblanc.val[,c(2:4)])
set.seed(2023)
som.data <- som(som.data, grid = somgrid(6, 6, "hexagonal"))
summary(som.data)
## SOM of size 6x6 with a hexagonal topology and a bubble neighbourhood function.
## The number of data layers is 1.
## Distance measure(s) used: sumofsquares.
## Training data included: 184 objects.
## Mean distance to the closest unit in the map: 0.007.
plot(som.data, main = "leblanc data",shape="straight")
## Warning in par(opar): argument 1 does not name a graphical parameter
# TACC3 cluster list
somclusterlist <- data.frame(rownames(leblanc.val),som.data$unit.classif)
grep("^23$",somclusterlist$som.data.unit.classif,value =T)
## [1] "23" "23" "23" "23"
TACC3list.leblanc <- somclusterlist[grep("^23$",somclusterlist$som.data.unit.classif),]
## use hierarchical clustering to cluster
som.data$codes <- as.data.frame(som.data$codes)
som_cluster <- cutree(hclust(dist(som.data$codes)),k = 5)
# plot these results:
plot(som.data, type="mapping", bgcol = som_cluster, main = "Clusters")
add.cluster.boundaries(som.data, som_cluster)
### 3D scatter plot
#plot3d(leblancavg)
#rglwidget()
#Dotplot of TACC3 cluster
DotPlot(leblanc, features = TACC3list.leblanc$rownames.leblanc.val. ,dot.scale = 8) + RotatedAxis()
## Warning: Scaling data with a low number of groups may produce misleading results
SOM clustering on all genes
leblancavgall <- AverageExpression(leblanc,slot = "scale.data")
leblancavgall <- as.data.frame(leblancavgall)
#PCA CLUSTERING
leblancallpca <- prcomp(leblancavgall)
allpca.scores <- data.frame(leblancallpca$x)
leblanc.valall <- cbind(leblancavgall,allpca.scores)
head(leblanc.valall)
## RNA.G2M RNA.G1 RNA.S PC1 PC2
## FO538757.2 -0.007467288 0.010400026 -0.023757525 -0.030333729 0.0178150679
## AP006222.2 -0.078484560 0.071572920 -0.143375748 -0.157527978 0.1009754532
## RP4-669L17.10 -0.022351914 -0.004044557 -0.013357627 -0.034068039 -0.0007278859
## RP11-206L10.9 -0.007157648 -0.001265398 0.007934584 -0.014111910 -0.0118040256
## LINC00115 -0.034101518 0.017724276 -0.026860368 -0.056923574 0.0097439200
## FAM41C 0.020441711 0.003944667 -0.030463548 -0.006612753 0.0351644067
## PC3
## FO538757.2 0.0014726278
## AP006222.2 0.0017340089
## RP4-669L17.10 -0.0119649964
## RP11-206L10.9 0.0011124561
## LINC00115 0.0007717643
## FAM41C 0.0001050152
# SOM CLUSTERING
leblanc.valall$gene <- rownames(leblanc.valall)
leblanc.valall <- leblanc.valall %>% select(gene, everything())
som.data <- as.matrix(leblanc.valall[,c(2:4)])
set.seed(2023)
som.data <- som(som.data, grid = somgrid(6, 6, "hexagonal"))
summary(som.data)
## SOM of size 6x6 with a hexagonal topology and a bubble neighbourhood function.
## The number of data layers is 1.
## Distance measure(s) used: sumofsquares.
## Training data included: 15234 objects.
## Mean distance to the closest unit in the map: 0.002.
plot(som.data, main = "leblanc data",shape="straight")
## Warning in par(opar): argument 1 does not name a graphical parameter
somclusterlist <- data.frame(rownames(leblanc.valall),som.data$unit.classif)
grep("^31$",somclusterlist$som.data.unit.classif,value =T)
## [1] "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31"
## [16] "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31"
## [31] "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31"
## [46] "31" "31" "31" "31" "31" "31" "31" "31" "31" "31" "31"
TACC3listall.leblanc <- somclusterlist[grep("^31$",somclusterlist$som.data.unit.classif),]
## use hierarchical clustering to cluster the codebook vectors
som.data$codes <- as.data.frame(som.data$codes)
som_cluster <- cutree(hclust(dist(som.data$codes)),k = 5)
# plot these results:
plot(som.data, type="mapping", bgcol = som_cluster, main = "Clusters")
add.cluster.boundaries(som.data, som_cluster)
length(intersect(TACC3list.leblanc$rownames.leblanc.val.,TACC3listall.leblanc$rownames.leblanc.valall.)) # 2 (half DE genes)
## [1] 2
Complex Heatmap - SOM derived TACC3 list for average expression (leblanc)
# library(ComplexHeatmap)
# library(circlize)
# library(GetoptLong)
#
# #Loading the final expression matrix for scaled neftel data
# #exp.matFINAL <- read.table(file = "", header = TRUE, as.is = TRUE, row.names = 1)
#
# # Cut down expression table to just TACC3 related genes from SOM Clustering
# Tacc3leblanc.mtx <- exp.matFINAL[rownames(exp.matFINAL) %in% TACC3list$rownames.leblanc.val.,]
#
# #Annotating columns with Cell Cycle Phase
# ha=HeatmapAnnotation(Phase=leblanc$Phase, col=list(Phase=c("S"="lightgreen","G2M"="lightblue","G1"="orange")))
#
# #Constructing a ComplexHetamp
# ht_list = Heatmap(data.matrix(Tacc3leblanc.mtx), col = colorRamp2(c(-1.5, 0, 1.5), c("blue", "white", "red")),border = "white" ,name = "scaled_expr",row_title = "Clustering of TACC3 related genes from SOM",show_column_names = FALSE,show_row_names = TRUE ,top_annotation = ha,row_names_max_width = unit(3, "cm"),width = unit(10, "cm"),na_col = "black",column_km =3,gap=unit(10,"mm"),row_names_gp = gpar(fontsize = 10),heatmap_legend_param = list(title = "Scaled expression",use_raster = TRUE))
# #save.image("/rds/projects/g/gendood-preclinomics/Bioinfo_Module_6_Group_Project/som_ht_neftel.RData")
#
# #View the heatmap
# ht_list